/*******************************************************************************

Name: Table 3 - Mean system characteristics for treated and untreated 
			    water systems pre- (a) and post-matching (b)

Authors: Jonathan Baker, Lori Bennear, Sheila Olmstead

Input files:
	Primary panel: natlCWS_yr_Panel_mod-small_v2-Match.dta
	Alternate panel (2): natlCWS_yr_Panel_mod-small.dta

Notes: the alternate panel (2) does not contain the results of the matching
	   procedure nor individual violations by rule, but does include an
	   additional variable needed for this table (t0_v_avg).
		
*******************************************************************************/


* ========================== START START START ==========================
clear
clear matrix
set more off

* ______________________________________________________________________________
* set paths and create log file
global root "C:/Users/jbaker/Documents/Research/SDWIS/Analysis/STATA"
global input "$root/Input"
global intermediate "$root/Intermediate"
global log "$root/LogFiles"
global output "$root/Output"
global outpanel "$root/OutputPanel"
global figures "$root/Figures"

log using "$log/Table3_matched-vs-unmatched_summaryStatistics.log", ///
	replace name(panel)
set more off

* ============================================================================== 
* table of violations summary characteristics

use "$outpanel/natlCWS_yr_Panel_mod-small.dta", clear

// Table of system characteristics without matching
keep if id2001==1 & year == 1990
local rows epa_reg surface public purchaser t0_v_avg
local rowmax = 6
mat T = J(`rowmax',11,.)

* populate matrix 
local i = 0
foreach v of varlist `rows' {
	local i = `i' + 1
	* T_501 - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		quietly summarize `v'
		mat T[`i',1] = r(mean)
		quietly ttest `v', by(T_501)
		display "`v' (publishing threshold): p value of difference `r(p)'"
		mat T[`i',2] = r(mu_1)
		mat T[`i',3] = r(mu_2)
		mat T[`i',4] = r(p)
		mat T[`rowmax',1] = `r(N_1)' + `r(N_2)'
		mat T[`rowmax',2] = `r(N_1)'
		mat T[`rowmax',3] = `r(N_2)'
	* T_10k - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		quietly ttest `v', by(T_10k)
		display "`v' (mailing threshold): p value of difference `r(p)'"
		mat T[`i',5] = r(mu_1)
		mat T[`i',6] = r(mu_2)
		mat T[`i',7] = r(p)
		mat T[`rowmax',5] = `r(N_1)'
		mat T[`rowmax',6] = `r(N_2)'
	* T_100k - - - - - - - - - - - - - - - - - - - - - - - - - - - -
		quietly ttest `v', by(T_100k)
		display "`v' (online threshold): p value of difference `r(p)'"
		mat T[`i',8] = r(mu_1)
		mat T[`i',9] = r(mu_2)
		mat T[`i',10] = r(p)
		mat T[`rowmax',8] = `r(N_1)'
		mat T[`rowmax',9] = `r(N_2)'
}
	* output matrix to table
	local rnames "EPA region" "Surface Water" "Public" "Purchaser" "Mean Pre-Treat Viol" "Observations"
	mat rownames T = "`rnames'"
	putexcel set "$output/Table3_matched-vs-unmatched_summaryStatistics.xlsx", ///
	sheet(unmatchedSummary, replace) modify
	putexcel A1 = matrix(T), rownames nformat(number_d2)

// Table of system characteristics with matching
mat Tm = J(`rowmax',11,.)

* populate matrix
	* Full sample - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
	keep if id2001==1 & year == 1990
	local i = 0
	foreach v of varlist `rows' {
		local i = `i' + 1
		quietly summarize `v'
		mat Tm[`i',1] = r(mean)
		mat Tm[`rowmax',1] = r(N)
		}
	* T_501 - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
	keep if id2001==1 & year == 1990 & mflag501 == 1
	expand freq_501
	sort pwsidNUM
	local i = 0
	foreach v of varlist `rows' {
		local i = `i' + 1
		quietly ttest `v' if mflag501 == 1, by(T_501)
		display "`v' (publishing threshold): p value of difference `r(p)'"
		mat Tm[`i',2] = r(mu_1)
		mat Tm[`i',3] = r(mu_2)
		mat Tm[`i',4] = r(p)
		mat Tm[`rowmax',2] = r(N_1)
		mat Tm[`rowmax',3] = r(N_2)
		}
	// T_10k - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
	keep if id2001==1 & year == 1990 & mflag10k == 1
	expand freq_10k
	sort pwsidNUM
	local i = 0
	foreach v of varlist `rows' {
		local i = `i' + 1
		quietly ttest `v' if mflag10k == 1, by(T_10k)
		display "`v' (mailing threshold): p value of difference `r(p)'"
		mat Tm[`i',5] = r(mu_1)
		mat Tm[`i',6] = r(mu_2)
		mat Tm[`i',7] = r(p)
		mat Tm[`rowmax',5] = r(N_1)
		mat Tm[`rowmax',6] = r(N_2)
		}
	// T_100k - - - - - - - - - - - - - - - - - - - - - - - - - - - -
	use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
	keep if id2001==1 & year == 1990 & mflag100k == 1
	expand freq_100k
	sort pwsidNUM
	local i = 0
	foreach v of varlist `rows' {
		local i = `i' + 1
		quietly ttest `v' if mflag100k == 1, by(T_100k)
		display "`v' (online threshold): p value of difference `r(p)'"
		mat Tm[`i',8] = r(mu_1)
		mat Tm[`i',9] = r(mu_2)
		mat Tm[`i',10] = r(p)
		mat Tm[`rowmax',8] = r(N_1)
		mat Tm[`rowmax',9] = r(N_2)
	}
	* output matrix to table
	mat rownames Tm = "`rnames'"
	putexcel set ///
	"$output/Table3_matched-vs-unmatched_summaryStatistics.xlsx", ///
	sheet(matchedSummary, replace) modify
	putexcel A1 = matrix(Tm), rownames nformat(number_d2)

	* ========================== DONE DONE DONE ==========================
log close panel
